{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 基于scikit包中的创建模拟数据的API创建聚类数据对K-Means算法和MiniBatch K-Means算法构建的模型进行评估"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- [K-means官方文档](http://scikit-learn.org/stable/modules/generated/sklearn.cluster.KMeans.html#sklearn.cluster.KMeans)\n",
    "- [MiniBatchKMeans官方文档](http://scikit-learn.org/stable/modules/generated/sklearn.cluster.MiniBatchKMeans.html#sklearn.cluster.MiniBatchKMeans)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import time\n",
    "import numpy as np  \n",
    "import matplotlib.pyplot as plt  \n",
    "import matplotlib as mpl\n",
    "from sklearn.cluster import MiniBatchKMeans, KMeans \n",
    "from sklearn import metrics\n",
    "from sklearn.metrics.pairwise import pairwise_distances_argmin  \n",
    "from sklearn.datasets.samples_generator import make_blobs  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "## 设置属性防止中文乱码\n",
    "mpl.rcParams['font.sans-serif'] = [u'SimHei']\n",
    "mpl.rcParams['axes.unicode_minus'] = False"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "centers = [[1, 1], [-1, -1], [1, -1]] \n",
    "clusters = len(centers)       \n",
    "\n",
    "X, Y = make_blobs(n_samples=3000, centers=centers, cluster_std=0.7, random_state=28)  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "K-Means算法模型训练消耗时间:0.1640s\n"
     ]
    }
   ],
   "source": [
    "k_means = KMeans(init='k-means++', n_clusters=clusters, random_state=28)\n",
    "t0 = time.time() \n",
    "k_means.fit(X)  \n",
    "km_batch = time.time() - t0  \n",
    "print (\"K-Means算法模型训练消耗时间:%.4fs\" % km_batch)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Mini Batch K-Means算法模型训练消耗时间:0.0280s\n"
     ]
    }
   ],
   "source": [
    "batch_size = 100\n",
    "mbk = MiniBatchKMeans(init='k-means++', n_clusters=clusters, batch_size=batch_size, random_state=28)  \n",
    "t0 = time.time()  \n",
    "mbk.fit(X)  \n",
    "mbk_batch = time.time() - t0  \n",
    "print (\"Mini Batch K-Means算法模型训练消耗时间:%.4fs\" % mbk_batch)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0 2 2 ..., 1 1 0]\n",
      "[1 0 0 ..., 2 2 1]\n"
     ]
    }
   ],
   "source": [
    "km_y_hat = k_means.labels_\n",
    "mbkm_y_hat = mbk.labels_\n",
    "print(km_y_hat)\n",
    "print(mbkm_y_hat)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "K-Means算法聚类中心点:\n",
      "center= [[-1.0600799  -1.05662982]\n",
      " [ 1.02975208 -1.07435837]\n",
      " [ 1.01491055  1.02216649]]\n",
      "Mini Batch K-Means算法聚类中心点:\n",
      "center= [[ 0.99602094  1.10688195]\n",
      " [-1.00828286 -1.05983915]\n",
      " [ 1.07892315 -0.94286826]]\n"
     ]
    }
   ],
   "source": [
    "k_means_cluster_centers = k_means.cluster_centers_\n",
    "mbk_means_cluster_centers = mbk.cluster_centers_\n",
    "print (\"K-Means算法聚类中心点:\\ncenter=\", k_means_cluster_centers)\n",
    "print (\"Mini Batch K-Means算法聚类中心点:\\ncenter=\", mbk_means_cluster_centers)\n",
    "order = pairwise_distances_argmin(k_means_cluster_centers,  \n",
    "                                  mbk_means_cluster_centers) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "K-Means算法:adjusted_rand_score评估函数计算结果值:0.72526；计算消耗时间:0.079s\n",
      "Mini Batch K-Means算法:adjusted_rand_score评估函数计算结果值:0.72421；计算消耗时间:0.001s\n",
      "\n",
      "K-Means算法:v_measure_score评估函数计算结果值:0.65754；计算消耗时间:0.035s\n",
      "Mini Batch K-Means算法:v_measure_score评估函数计算结果值:0.65780；计算消耗时间:0.003s\n",
      "\n",
      "K-Means算法:adjusted_mutual_info_score评估函数计算结果值:0.65726；计算消耗时间:0.047s\n",
      "Mini Batch K-Means算法:adjusted_mutual_info_score评估函数计算结果值:0.65757；计算消耗时间:0.005s\n",
      "\n",
      "K-Means算法:mutual_info_score评估函数计算结果值:0.72231；计算消耗时间:0.001s\n",
      "Mini Batch K-Means算法:mutual_info_score评估函数计算结果值:0.72264；计算消耗时间:0.001s\n",
      "\n"
     ]
    }
   ],
   "source": [
    "### 聚类效果评估\n",
    "score_funcs = [\n",
    "    metrics.adjusted_rand_score,# 兰德指数，计算样本预测值和真实值之间的相似度\n",
    "    metrics.v_measure_score, # 均一性和完整性的加权平均\n",
    "    metrics.adjusted_mutual_info_score, # 调整后的互信息\n",
    "    metrics.mutual_info_score, # 互信息\n",
    "]\n",
    "\n",
    "## 2. 迭代对每个评估函数进行评估操作\n",
    "for score_func in score_funcs:\n",
    "    t0 = time.time()\n",
    "    km_scores = score_func(Y,km_y_hat)\n",
    "    print(\"K-Means算法:%s评估函数计算结果值:%.5f；计算消耗时间:%0.3fs\" % (score_func.__name__,km_scores, time.time() - t0))\n",
    "    \n",
    "    t0 = time.time()\n",
    "    mbkm_scores = score_func(Y,mbkm_y_hat)\n",
    "    print(\"Mini Batch K-Means算法:%s评估函数计算结果值:%.5f；计算消耗时间:%0.3fs\\n\" % (score_func.__name__,mbkm_scores, time.time() - t0))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python [default]",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
